{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#|default_exp models.gMLP"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# gMLP"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is an unofficial PyTorch implementation based on:\n",
    "\n",
    "* Liu, H., Dai, Z., So, D. R., & Le, Q. V. (2021). <span style=\"color:dodgerblue\">**Pay Attention to MLPs**</span>. arXiv preprint arXiv:2105.08050.\n",
    "\n",
    "* Cholakov, R., & Kolev, T. (2022). <span style=\"color:dodgerblue\">**The GatedTabTransformer. An enhanced deep\n",
    "learning architecture for tabular modeling**</span>. arXiv preprint arXiv:2201.00199."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#|export\n",
    "from tsai.imports import *\n",
    "from tsai.models.layers import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#|export\n",
    "class _SpatialGatingUnit(nn.Module):\n",
    "    def __init__(self, d_ffn, seq_len):\n",
    "        super().__init__()\n",
    "        self.norm = nn.LayerNorm(d_ffn)\n",
    "        self.spatial_proj = nn.Conv1d(seq_len, seq_len, kernel_size=1)\n",
    "        nn.init.constant_(self.spatial_proj.bias, 1.0)\n",
    "        nn.init.normal_(self.spatial_proj.weight, std=1e-6)\n",
    "\n",
    "    def forward(self, x):\n",
    "        u, v = x.chunk(2, dim=-1)\n",
    "        v = self.norm(v)\n",
    "        v = self.spatial_proj(v)\n",
    "        out = u * v\n",
    "        return out\n",
    "\n",
    "\n",
    "class _gMLPBlock(nn.Module):\n",
    "    def __init__(self, d_model, d_ffn, seq_len):\n",
    "        super().__init__()\n",
    "        self.norm = nn.LayerNorm(d_model)\n",
    "        self.channel_proj1 = nn.Linear(d_model, d_ffn * 2)\n",
    "        self.channel_proj2 = nn.Linear(d_ffn, d_model)\n",
    "        self.sgu = _SpatialGatingUnit(d_ffn, seq_len)\n",
    "\n",
    "    def forward(self, x):\n",
    "        residual = x\n",
    "        x = self.norm(x)\n",
    "        x = F.gelu(self.channel_proj1(x))\n",
    "        x = self.sgu(x)\n",
    "        x = self.channel_proj2(x)\n",
    "        out = x + residual\n",
    "        return out\n",
    "\n",
    "\n",
    "class _gMLPBackbone(nn.Module):\n",
    "    def __init__(self, d_model=256, d_ffn=512, seq_len=256, depth=6):\n",
    "        super().__init__()\n",
    "        self.model = nn.Sequential(\n",
    "            *[_gMLPBlock(d_model, d_ffn, seq_len) for _ in range(depth)]\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.model(x)\n",
    "\n",
    "\n",
    "class gMLP(_gMLPBackbone):\n",
    "    def __init__(\n",
    "        self,\n",
    "        c_in,\n",
    "        c_out,\n",
    "        seq_len,\n",
    "        patch_size=1,\n",
    "        d_model=256,\n",
    "        d_ffn=512,\n",
    "        depth=6,\n",
    "    ):\n",
    "        assert seq_len % patch_size == 0, \"`seq_len` must be divisibe by `patch_size`\"\n",
    "        super().__init__(d_model, d_ffn, seq_len // patch_size, depth)\n",
    "        self.patcher = nn.Conv1d(\n",
    "            c_in, d_model, kernel_size=patch_size, stride=patch_size\n",
    "        )\n",
    "        self.head = nn.Linear(d_model, c_out)\n",
    "\n",
    "    def forward(self, x):\n",
    "        patches = self.patcher(x)\n",
    "        batch_size, num_channels, _ = patches.shape\n",
    "        patches = patches.permute(0, 2, 1)\n",
    "        patches = patches.view(batch_size, -1, num_channels)\n",
    "        embedding = self.model(patches)\n",
    "        embedding = embedding.mean(dim=1)\n",
    "        out = self.head(embedding)\n",
    "        return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bs = 16\n",
    "c_in = 3\n",
    "c_out = 2\n",
    "seq_len = 64\n",
    "patch_size = 4\n",
    "xb = torch.rand(bs, c_in, seq_len)\n",
    "model = gMLP(c_in, c_out, seq_len, patch_size=patch_size)\n",
    "test_eq(model(xb).shape, (bs, c_out))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/javascript": "IPython.notebook.save_checkpoint();",
      "text/plain": [
       "<IPython.core.display.Javascript object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/nacho/notebooks/tsai/nbs/103d_models.gMLP.ipynb saved at 2022-11-09 13:01:47\n",
      "Correct notebook to script conversion! 😃\n",
      "Wednesday 09/11/22 13:01:49 CET\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "                <audio  controls=\"controls\" autoplay=\"autoplay\">\n",
       "                    <source src=\"data:audio/wav;base64,UklGRvQHAABXQVZFZm10IBAAAAABAAEAECcAACBOAAACABAAZGF0YdAHAAAAAPF/iPh/gOoOon6w6ayCoR2ZeyfbjobxK+F2Hs0XjKc5i3DGvzaTlEaraE+zz5uLUl9f46fHpWJdxVSrnfmw8mYEScqUP70cb0Q8X41uysJ1si6Eh1jYzXp9IE2DzOYsftYRyoCY9dJ/8QICgIcEun8D9PmAaBPlfT7lq4MFIlh61tYPiCswIHX+yBaOqT1QbuW7qpVQSv9lu6+xnvRVSlyopAypbGBTUdSalrSTaUBFYpInwUpxOzhti5TOdndyKhCGrdwAfBUcXIJB69p+Vw1egB76+n9q/h6ADglbf4LvnIHfF/981ODThF4m8HiS0riJVjQ6c+/EOZCYQfJrGrhBmPVNMmNArLKhQlkXWYqhbaxXY8ZNHphLuBJsZUEckCTFVHMgNKGJytIDeSUmw4QN4Qx9pReTgb3vYX/TCBuApf75f+P5Y4CRDdN+B+tngk8c8nt03CKGqipgd13OhotwOC5x9MCAknFFcmlmtPmagFFFYOCo0qRzXMhVi57pryNmIEqJlRi8bm52PfuNM8k4dfQv+4cO12l6zCGdg3jl730uE/KAPvS+f0wEAoAsA89/XfXQgBESIn6S5luDtiC8eh/YmIfpLqt1OMp5jXg8/24MveqUNUnPZsqw0Z3yVDldnaUOqIZfXlKrm36zzWhjRhaT+r+ncHI5/otUzfd2uSt7hl/bqXtoHaCC6+mqfrAOeoDD+PJ/xf8RgLMHfH/b8GeBihZIfSXidoQSJWB52NM1iRkzz3MkxpKPbUCrbDu5d5fgTAxkSK3JoEhYD1p2omere2LZTuqYLbdWa49Cx5Dww7tyXDUnioXRkHhwJyKFvd/AfPoYy4Fl7j1/LQorgEr9/X89+0qAOAwAf13sJoL8Gkd8wt25hWIp3Heez/eKODfPcSPCzpFNRDVqf7UlmnNQKGHgqd+jgVvJVm2f265QZTpLS5byur1tpT6ajvrHq3Q2MXWIxtUCehoj8YMk5LB9hRQegeTypn+nBQWA0QHgf7f2q4C5EFt+5ucOg2YfHXtq2SSHpS0ydnTL4IxFO6pvNb4ulBdInWfcsfSc7VMmXpSmE6eeXmZThJxpsgRohEfOk86+AHCoOpOMFsx1dv8s6oYT2k17uR7ngpXod34IEJqAaPfnfyABCIBZBpl/NPI2gTQVjX134x2ExSPMeR7VtYjZMWJ0W8ftjkA/YW1durCWykvjZFKu4p9LVwVbZKNkqpxh6U+6mRC2mGq2Q3SRvsIgcpc2sIpD0Bp4uiiFhW3ecXxOGgaCDe0Vf4cLPoDv+/5/mfw1gN4KKX+17emBqBmYfBHfVYUZKFR44NBtiv41bHJUwx+RJkP1apu2VJlkTwli4qrwoo1ax1dToNCtemRSTBGXz7kJbdM/PY/Dxht0dTLziH7Ul3loJEiE0uJsfdsVTYGL8Yt/AgcMgHYA7X8S+IqAYA+QfjzpxIIVHnp7tdqzhmAstXaxzEqMETpScGC/dJP3Rmdo8LIZnOVSEF+Opxumsl1sVF+dVrE5Z6NIiZSkvVdv2zsqjdnK8HVDLlyHyNjuegogM4NA5z9+YRG9gA722H97AgOA/gSyf43zCIHdE899yuTIg3ciNXpm1jmImTDwdJPITI4RPhRugbvslbFKt2Vfr/6eTFb4W1WkY6m6YPdQjJr2tNZp3EQlko7BgXHRNz2LAc+gdwMq7IUf3R58ohtFgrbr6n7hDFWAlPr8f/T9I4CECU9/De+vgVQY5nxh4POEzybJeCTS5YnCNAZzhsRzkP1Bsmu4t4aYU07nYuerA6KWWcJYO6HHrKJjaE3Zl624UWz/QOOPjcWHc7QzdIk40yl5tCWjhIDhJX0xF4CBMvBsf10IF4Ac//Z/bPlsgAcOwn6S6n6CwxzUewLcRoYaKzV38M23i9o493CNwL6S1UUuaQe0QpvbUfdfiqglpcRccFU+nkWwambASUiVfLyqbg49xY2eyWh1hy/Sh37XjHpaIYKD7OUEfrgS5IC09MV/1gMBgKMDyH/n9N6AhhINfh7mdoMoIZt6r9fAh1cvfHXNya6N4DzDbqi8K5WWSYlmbbAdnkpV6FxJpWSo1V8DUmGb3rMRaQBG2JJgwN9wCDnNi8HNI3dKK1aG0dvHe/UciIJf6rt+Og5wgDn59X9P/xWAKQhxf2XweYH+FjB9suGVhIMlOnlo02GJhTOdc7vFyo/TQGxs2Li7lz9NwmPurBihnVi7WSWiwKvGYntOpJiOt5drKUKMkFnE8HLxNPmJ9NG4eP8mAYUv4Np8hhi3gdruSX+3CSWAwP38f8f6UoCuDPF+6Os8gnAbKnxQ3d2F0imydzDPKIuiN5lxu8EKkrFE82kftW2az1DbYImpMqTUW3FWIJ83r5hl2koJlla7+m0+PmSOZcjcdMgwS4g11iZ6qCLUg5jkxn0QFA6BWvOvfzEFBIBHAtp/Qfa3gC4RSH5y5yeD2B/8evnYS4cULgR2CMsUja47cG/QvW6UeEhXZ3+xP51GVNVdP6Zpp+1eDFM5nMeySWghR4+TNL85cD46YIyCzKJ2kCzEhoTabXtGHs+CCemJfpMPjoDe9+t/qQALgM8Gj3++8UaBqRV2fQTjO4Q3JKd5r9TgiEYyMHTxxiWPpz8jbfq585YpTJpk960xoKFXsVoTo7yq6GGMTw==\" type=\"audio/wav\" />\n",
       "                    Your browser does not support the audio element.\n",
       "                </audio>\n",
       "              "
      ],
      "text/plain": [
       "<IPython.lib.display.Audio object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#|eval: false\n",
    "#|hide\n",
    "from tsai.export import get_nb_name; nb_name = get_nb_name(locals())\n",
    "from tsai.imports import create_scripts; create_scripts(nb_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
